Clear workspace
rm(list = ls())
library(bigrquery)
library(stringr)
library(tidyverse)
library(dplyr)
library(lme4)
library(MuMIn)
library(scales)
response <- try(system('~/google-cloud-sdk/bin/gcloud projects list --quiet', intern = T))
projectid <- strsplit(response[2], " ")[[1]][1]
create_dataset <- function(poolname) {
sql <- str_replace_all("SELECT
##POOL_NAME##.percentage_of_regional_pool_present,
##POOL_NAME##.difference_from_locality_trait_gravity,
##POOL_NAME##.percentage_of_niches_present,
##POOL_NAME##.percentage_of_niches_2_present,
##POOL_NAME##.percentage_of_niches_3_present,
latitude,
longitude,
percentage_landcover_5km.closed_forest_total AS closed_forest,
percentage_landcover_5km.cultivated,
percentage_landcover_5km.herbaceous_vegetation,
percentage_landcover_5km.herbaceous_wetland,
percentage_landcover_5km.open_forest_total AS open_forest,
percentage_landcover_5km.permanent_water,
percentage_landcover_5km.shrubs,
percentage_landcover_5km.urban,
percentage_landcover_5km.elevation.mean AS mean_elevation,
percentage_landcover_5km.elevation.delta AS elevation_delta,
average_population_density.within_5km AS average_population_density,
urban_area.name AS city_name,
urban_area.location.continent,
urban_area.ecosystem.realm,
urban_area.ecosystem.biome.biome_name AS biome,
urban_area.country_economy.gdp_estimate_thousand_dollars_per_person AS national_gdp_estimate_thousand_dollars_per_person,
urban_area.country_economy.income_group AS national_income_group,
locality_id
FROM model.urban_hotspot
JOIN model2.all_species USING(locality_id, city_id)
JOIN model.urban_area USING (city_id)", '##POOL_NAME##', poolname)
print(sql)
tb <- bq_project_query(projectid, sql)
bq_table_download(tb)
}
load_dataset <- function(poolname) {
filename <- str_replace('download_data__output__hotspot_metrics_##POOL_NAME##.csv', '##POOL_NAME##', poolname)
if (!file.exists(filename)) {
data <- create_dataset(poolname)
write_csv(data, filename)
}
data <- read_csv(filename)
data$city_name = as.factor(data$city_name)
data$continent = relevel(as.factor(data$continent), ref = "Europe")
data$realm = relevel(as.factor(data$realm), ref = "Palearctic")
data$biome = as.factor(data$biome)
data$national_income_group = as.factor(data$national_income_group)
data$mean_elevation_scaled = rescale(data$mean_elevation, to = c(0, 1), from = range(data$mean_elevation, na.rm = TRUE, finite = TRUE))
data$elevation_delta_scaled = rescale(data$elevation_delta, to = c(0, 1), from = range(data$elevation_delta, na.rm = TRUE, finite = TRUE))
data$average_population_density_scaled = rescale(data$average_population_density, to = c(0, 1), from = range(data$average_population_density, na.rm = TRUE, finite = TRUE))
data$national_gdp_estimate_thousand_dollars_per_person_scaled = rescale(data$national_gdp_estimate_thousand_dollars_per_person, to = c(0, 1), from = range(data$national_gdp_estimate_thousand_dollars_per_person, na.rm = TRUE, finite = TRUE))
data$latitude_scaled = rescale(data$latitude, to = c(0, 1), from = range(data$latitude, na.rm = TRUE, finite = TRUE))
data$longitude_scaled = rescale(data$longitude, to = c(0, 1), from = range(data$longitude, na.rm = TRUE, finite = TRUE))
data$absolute_latitude_scaled = abs(data$latitude_scaled)
data
}
merlin <- load_dataset('merlin')
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
city_name = col_character(),
continent = col_character(),
realm = col_character(),
biome = col_character(),
national_income_group = col_character(),
locality_id = col_character()
)
ℹ Use `spec()` for the full column specifications.
merlin
birdlife <- load_dataset('birdlife')
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
city_name = col_character(),
continent = col_character(),
realm = col_character(),
biome = col_character(),
national_income_group = col_character(),
locality_id = col_character()
)
ℹ Use `spec()` for the full column specifications.
birdlife
both <- load_dataset('both')
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
city_name = col_character(),
continent = col_character(),
realm = col_character(),
biome = col_character(),
national_income_group = col_character(),
locality_id = col_character()
)
ℹ Use `spec()` for the full column specifications.
both
either <- load_dataset('either')
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
city_name = col_character(),
continent = col_character(),
realm = col_character(),
biome = col_character(),
national_income_group = col_character(),
locality_id = col_character()
)
ℹ Use `spec()` for the full column specifications.
either
population_growth <- function(city_row) {
population <- c(city_row$pop1950, city_row$pop1955, city_row$pop1960, city_row$pop1965, city_row$pop1970, city_row$pop1975, city_row$pop1980, city_row$pop1985, city_row$pop1990, city_row$pop1995, city_row$pop2000, city_row$pop2005, city_row$pop2010, city_row$pop2015, city_row$pop2020)
years <- c(1950, 1955, 1960, 1965, 1970, 1975, 1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2020)
model <- lm(population ~ years)
model$coefficients[2]
}
city_data <- read_csv('download_data__input__city_data.csv')
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
name = col_character(),
city_includes_estuary = col_logical(),
region_100km_includes_estuary = col_logical(),
region_50km_includes_estuary = col_logical(),
region_20km_includes_estuary = col_logical(),
biome_name = col_character(),
realm = col_character()
)
ℹ Use `spec()` for the full column specifications.
city_data$realm <- as.factor(city_data$realm)
city_data$city_includes_estuary <- as.factor(city_data$city_includes_estuary)
city_data$region_100km_includes_estuary <- as.factor(city_data$region_100km_includes_estuary)
city_data$region_50km_includes_estuary <- as.factor(city_data$region_50km_includes_estuary)
city_data$region_20km_includes_estuary <- as.factor(city_data$region_20km_includes_estuary)
city_data$biome_name <- as.factor(city_data$biome_name)
city_data$population_growth = 0
for(i in 1:nrow(city_data)) {
city_data[i,]$population_growth = population_growth(city_data[i,])
}
city_data